x86 hvm: x2APIC emulation
authorKeir Fraser <keir@xen.org>
Tue, 7 Dec 2010 18:24:12 +0000 (18:24 +0000)
committerKeir Fraser <keir@xen.org>
Tue, 7 Dec 2010 18:24:12 +0000 (18:24 +0000)
This patch would enable Xen to handle x2APIC MSR accessing of HVM
guest, which is faster(avoid decoding of MMIO accessing). The credit
comes to Gleb Natapov who complete the work for KVM.

Have tested with 4 vcpus guest, with/without x2apic support.

From: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Keir Fraser <keir@xen.org>
tools/libxc/xc_cpufeature.h
tools/libxc/xc_cpuid_x86.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/vlapic.c
xen/include/asm-x86/hvm/hvm.h
xen/include/asm-x86/hvm/vlapic.h
xen/include/asm-x86/msr-index.h

index 96991a6813b3ed4cd4b4292810a920092be322c0..b1731d2f4af3234f44af9209a2f8db22d15555ed 100644 (file)
@@ -98,6 +98,7 @@
 #define X86_FEATURE_DCA                (4*32+18) /* Direct Cache Access */
 #define X86_FEATURE_SSE4_1     (4*32+19) /* Streaming SIMD Extensions 4.1 */
 #define X86_FEATURE_SSE4_2     (4*32+20) /* Streaming SIMD Extensions 4.2 */
+#define X86_FEATURE_X2APIC      (4*32+21) /* x2APIC */
 #define X86_FEATURE_POPCNT     (4*32+23) /* POPCNT instruction */
 #define X86_FEATURE_AES                (4*32+25) /* AES acceleration instructions */
 #define X86_FEATURE_XSAVE      (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
index 7d963a903e515e9beda71c7dd6fc63b81a0dc5a3..5df426ce6cf73bdad6042fe4062a1f256a52b847 100644 (file)
@@ -208,7 +208,8 @@ static void xc_cpuid_hvm_policy(
                      (bitmaskof(X86_FEATURE_AVX) |
                       bitmaskof(X86_FEATURE_XSAVE)) : 0);
 
-        regs[2] |= bitmaskof(X86_FEATURE_HYPERVISOR);
+        regs[2] |= (bitmaskof(X86_FEATURE_HYPERVISOR) |
+                    bitmaskof(X86_FEATURE_X2APIC));
 
         regs[3] &= (bitmaskof(X86_FEATURE_FPU) |
                     bitmaskof(X86_FEATURE_VME) |
index bb50b6113189211f8313b7e1d52d69f5300be880..544286a1ff9b567ea75362b2041d108d0a63c4dd 100644 (file)
@@ -2205,6 +2205,11 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         *msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
 
+    case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff:
+        if ( hvm_x2apic_msr_read(v, msr, msr_content) )
+            goto gp_fault;
+        break;
+
     case MSR_IA32_CR_PAT:
         *msr_content = v->arch.hvm_vcpu.pat_cr;
         break;
@@ -2312,6 +2317,11 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
 
+    case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff:
+        if ( hvm_x2apic_msr_write(v, msr, msr_content) )
+            goto gp_fault;
+        break;
+
     case MSR_IA32_CR_PAT:
         if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
            goto gp_fault;
index f394518caf9e81ac40b2e9d22b57c856ba82a0fa..483588a99b6a41d93b5302f3312ae30215cd0032 100644 (file)
@@ -172,7 +172,13 @@ static uint32_t vlapic_get_ppr(struct vlapic *vlapic)
 static int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda)
 {
     int result = 0;
-    uint8_t logical_id;
+    uint32_t logical_id;
+
+    if ( vlapic_x2apic_mode(vlapic) )
+    {
+        logical_id = vlapic_get_reg(vlapic, APIC_LDR);
+        return !!(logical_id & mda);
+    }
 
     logical_id = GET_xAPIC_LOGICAL_ID(vlapic_get_reg(vlapic, APIC_LDR));
 
@@ -392,7 +398,7 @@ void vlapic_EOI_set(struct vlapic *vlapic)
 int vlapic_ipi(
     struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
 {
-    unsigned int dest       = GET_xAPIC_DEST_FIELD(icr_high);
+    unsigned int dest;
     unsigned int short_hand = icr_low & APIC_SHORT_MASK;
     unsigned int dest_mode  = !!(icr_low & APIC_DEST_MASK);
     struct vlapic *target;
@@ -401,6 +407,10 @@ int vlapic_ipi(
 
     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr = 0x%08x:%08x", icr_high, icr_low);
 
+    dest = (vlapic_x2apic_mode(vlapic)
+            ? icr_high
+            : GET_xAPIC_DEST_FIELD(icr_high));
+
     if ( (icr_low & APIC_MODE_MASK) == APIC_DM_LOWEST )
     {
         target = vlapic_lowest_prio(vlapic_domain(vlapic), vlapic,
@@ -528,65 +538,42 @@ static int vlapic_read(
     return X86EMUL_OKAY;
 }
 
+int hvm_x2apic_msr_read(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    uint32_t low, high = 0, offset = (msr - MSR_IA32_APICBASE_MSR) << 4;
+
+    if ( !vlapic_x2apic_mode(vlapic) )
+        return 1;
+
+    vlapic_read_aligned(vlapic, offset, &low);
+    if ( offset == APIC_ICR )
+        vlapic_read_aligned(vlapic, APIC_ICR2, &high);
+
+    *msr_content = (((uint64_t)high) << 32) | low;
+    return 0;
+}
+
 static void vlapic_pt_cb(struct vcpu *v, void *data)
 {
     *(s_time_t *)data = hvm_get_guest_time(v);
 }
 
-static int vlapic_write(struct vcpu *v, unsigned long address,
-                        unsigned long len, unsigned long val)
+static int vlapic_reg_write(struct vcpu *v,
+                            unsigned int offset, unsigned long val)
 {
     struct vlapic *vlapic = vcpu_vlapic(v);
-    unsigned int offset = address - vlapic_base_address(vlapic);
     int rc = X86EMUL_OKAY;
 
-    if ( offset != 0xb0 )
-        HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
-                    "offset 0x%x with length 0x%lx, and value is 0x%lx",
-                    offset, len, val);
-
-    /*
-     * According to the IA32 Manual, all accesses should be 32 bits.
-     * Some OSes do 8- or 16-byte accesses, however.
-     */
-    val = (uint32_t)val;
-    if ( len != 4 )
-    {
-        unsigned int tmp;
-        unsigned char alignment;
-
-        gdprintk(XENLOG_INFO, "Notice: Local APIC write with len = %lx\n",len);
-
-        alignment = offset & 0x3;
-        (void)vlapic_read_aligned(vlapic, offset & ~0x3, &tmp);
-
-        switch ( len )
-        {
-        case 1:
-            val = ((tmp & ~(0xff << (8*alignment))) |
-                   ((val & 0xff) << (8*alignment)));
-            break;
-
-        case 2:
-            if ( alignment & 1 )
-                goto unaligned_exit_and_crash;
-            val = ((tmp & ~(0xffff << (8*alignment))) |
-                   ((val & 0xffff) << (8*alignment)));
-            break;
-
-        default:
-            gdprintk(XENLOG_ERR, "Local APIC write with len = %lx, "
-                     "should be 4 instead\n", len);
-            goto exit_and_crash;
-        }
-    }
-    else if ( (offset & 0x3) != 0 )
-        goto unaligned_exit_and_crash;
-
-    offset &= ~0x3;
-
     switch ( offset )
     {
+    case APIC_ID:
+        if ( !vlapic_x2apic_mode(vlapic) )
+            vlapic_set_reg(vlapic, APIC_ID, val);
+        else
+            rc = X86EMUL_UNHANDLEABLE;
+        break;
+
     case APIC_TASKPRI:
         vlapic_set_reg(vlapic, APIC_TASKPRI, val & 0xff);
         break;
@@ -596,11 +583,17 @@ static int vlapic_write(struct vcpu *v, unsigned long address,
         break;
 
     case APIC_LDR:
-        vlapic_set_reg(vlapic, APIC_LDR, val & APIC_LDR_MASK);
+        if ( !vlapic_x2apic_mode(vlapic) )
+            vlapic_set_reg(vlapic, APIC_LDR, val & APIC_LDR_MASK);
+        else
+            rc = X86EMUL_UNHANDLEABLE;
         break;
 
     case APIC_DFR:
-        vlapic_set_reg(vlapic, APIC_DFR, val | 0x0FFFFFFF);
+        if ( !vlapic_x2apic_mode(vlapic) )
+            vlapic_set_reg(vlapic, APIC_DFR, val | 0x0FFFFFFF);
+        else
+            rc = X86EMUL_UNHANDLEABLE;
         break;
 
     case APIC_SPIV:
@@ -628,7 +621,19 @@ static int vlapic_write(struct vcpu *v, unsigned long address,
         break;
 
     case APIC_ESR:
-        /* Nothing to do. */
+        if ( vlapic_x2apic_mode(vlapic) && (val != 0) )
+        {
+            gdprintk(XENLOG_ERR, "Local APIC write ESR with non-zero %lx\n",
+                    val);
+            rc = X86EMUL_UNHANDLEABLE;
+        }
+        break;
+
+    case APIC_SELF_IPI:
+        if ( vlapic_x2apic_mode(vlapic) )
+            vlapic_reg_write(v, APIC_ICR, 0x40000 | (val & 0xff));
+        else
+            rc = X86EMUL_UNHANDLEABLE;
         break;
 
     case APIC_ICR:
@@ -639,7 +644,9 @@ static int vlapic_write(struct vcpu *v, unsigned long address,
         break;
 
     case APIC_ICR2:
-        vlapic_set_reg(vlapic, APIC_ICR2, val & 0xff000000);
+        if ( !vlapic_x2apic_mode(vlapic) )
+            val &= 0xff000000;
+        vlapic_set_reg(vlapic, APIC_ICR2, val);
         break;
 
     case APIC_LVTT:         /* LVT Timer Reg */
@@ -696,12 +703,67 @@ static int vlapic_write(struct vcpu *v, unsigned long address,
         break;
 
     default:
-        gdprintk(XENLOG_DEBUG,
-                 "Local APIC Write to read-only register 0x%x\n", offset);
         break;
     }
-
+    if (rc == X86EMUL_UNHANDLEABLE)
+        gdprintk(XENLOG_DEBUG,
+                "Local APIC Write wrong to register 0x%x\n", offset);
     return rc;
+}
+
+static int vlapic_write(struct vcpu *v, unsigned long address,
+                        unsigned long len, unsigned long val)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    unsigned int offset = address - vlapic_base_address(vlapic);
+    int rc = X86EMUL_OKAY;
+
+    if ( offset != 0xb0 )
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                    "offset 0x%x with length 0x%lx, and value is 0x%lx",
+                    offset, len, val);
+
+    /*
+     * According to the IA32 Manual, all accesses should be 32 bits.
+     * Some OSes do 8- or 16-byte accesses, however.
+     */
+    val = (uint32_t)val;
+    if ( len != 4 )
+    {
+        unsigned int tmp;
+        unsigned char alignment;
+
+        gdprintk(XENLOG_INFO, "Notice: Local APIC write with len = %lx\n",len);
+
+        alignment = offset & 0x3;
+        (void)vlapic_read_aligned(vlapic, offset & ~0x3, &tmp);
+
+        switch ( len )
+        {
+        case 1:
+            val = ((tmp & ~(0xff << (8*alignment))) |
+                   ((val & 0xff) << (8*alignment)));
+            break;
+
+        case 2:
+            if ( alignment & 1 )
+                goto unaligned_exit_and_crash;
+            val = ((tmp & ~(0xffff << (8*alignment))) |
+                   ((val & 0xffff) << (8*alignment)));
+            break;
+
+        default:
+            gdprintk(XENLOG_ERR, "Local APIC write with len = %lx, "
+                     "should be 4 instead\n", len);
+            goto exit_and_crash;
+        }
+    }
+    else if ( (offset & 0x3) != 0 )
+        goto unaligned_exit_and_crash;
+
+    offset &= ~0x3;
+
+    return vlapic_reg_write(v, offset, val);
 
  unaligned_exit_and_crash:
     gdprintk(XENLOG_ERR, "Unaligned LAPIC write len=0x%lx at offset=0x%x.\n",
@@ -711,6 +773,25 @@ static int vlapic_write(struct vcpu *v, unsigned long address,
     return rc;
 }
 
+int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t msr_content)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    uint32_t offset = (msr - MSR_IA32_APICBASE_MSR) << 4;
+    int rc;
+
+    if ( !vlapic_x2apic_mode(vlapic) )
+        return 1;
+
+    if ( offset == APIC_ICR )
+        if ( vlapic_reg_write(v, APIC_ICR2 , (uint32_t)(msr_content >> 32)) )
+            return 1;
+
+    rc = vlapic_reg_write(v, offset, (uint32_t)msr_content);
+
+    /* X86EMUL_RETRY for SIPI */
+    return ((rc != X86EMUL_OKAY) && (rc != X86EMUL_RETRY));
+}
+
 static int vlapic_range(struct vcpu *v, unsigned long addr)
 {
     struct vlapic *vlapic = vcpu_vlapic(v);
@@ -743,6 +824,13 @@ void vlapic_msr_set(struct vlapic *vlapic, uint64_t value)
 
     vlapic->hw.apic_base_msr = value;
 
+    if ( vlapic_x2apic_mode(vlapic) )
+    {
+        u32 id = vlapic_get_reg(vlapic, APIC_ID);
+        u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
+        vlapic_set_reg(vlapic, APIC_LDR, ldr);
+    }
+
     vmx_vlapic_msr_changed(vlapic_vcpu(vlapic));
 
     HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
index 6a98e51a3bf299c99ff35da937e80ac763237abe..4a7d379acdf1b67f526747739af0733098678f86 100644 (file)
@@ -364,4 +364,7 @@ bool_t hvm_hap_nested_page_fault(unsigned long gfn);
         ? (u32)__d->arch.incarnation : (u32)(v)->arch.hvm_vcpu.msr_tsc_aux; \
 })
 
+int hvm_x2apic_msr_read(struct vcpu *v, unsigned int msr, uint64_t *msr_content);
+int hvm_x2apic_msr_write(struct vcpu *v, unsigned int msr, uint64_t msr_content);
+
 #endif /* __ASM_X86_HVM_HVM_H__ */
index 905fff8daeef12e5028358cd318c725d2a5fb536..702ba542760896cc0dd3d8a8f5a1950e1c80a886 100644 (file)
@@ -51,6 +51,8 @@
 
 #define vlapic_base_address(vlapic)                             \
     ((vlapic)->hw.apic_base_msr & MSR_IA32_APICBASE_BASE)
+#define vlapic_x2apic_mode(vlapic)                              \
+    ((vlapic)->hw.apic_base_msr & MSR_IA32_APICBASE_EXTD)
 
 struct vlapic {
     struct hvm_hw_lapic      hw;
index aebbe1389bf7bd51aa8d22db8d46b04e7ccf2d9e..cc32932cd498c4f32a76e93882f1c89c2447224d 100644 (file)
 #define MSR_IA32_APICBASE_EXTD         (1<<10)
 #define MSR_IA32_APICBASE_ENABLE       (1<<11)
 #define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
+#define MSR_IA32_APICBASE_MSR           0x800
 
 #define MSR_IA32_UCODE_WRITE           0x00000079
 #define MSR_IA32_UCODE_REV             0x0000008b